# PREPARE
library(phyloseq)
library(ggpubr) # a handy helper package for ggplots
theme_set(theme_bw()) # setting the theme for ggplots
library(microbiome)
library(tidyverse)
library(DT)
`%notin%` <- Negate(`%in%`)
# MICROBIAL DATA
# reading in a previously saved phyloseq object. This contains microbial 16S-ASV abundances of all samples.
ps_1C <- readRDS("./data/ps_215samplesJul24")
# ps_1C
#phyloseq-class experiment-level object
#otu_table() OTU Table: [ 17961 taxa and 215 samples ]
#sample_data() Sample Data: [ 215 samples by 48 sample variables ]
#tax_table() Taxonomy Table: [ 17961 taxa by 7 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 17961 tips and 17822 internal nodes ]
### Pre-filtering microbial data
ps.flt <- prune_taxa(taxa_sums(ps_1C) >= 10, ps_1C) #m inimum reads per ASV
#phyloseq-class experiment-level object
#otu_table() OTU Table: [ 5855 taxa and 215 samples ]
#sample_data() Sample Data: [ 215 samples by 60 sample variables ]
#tax_table() Taxonomy Table: [ 5855 taxa by 7 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 5855 tips and 5763 internal nodes ]
# MASTERDATA
# change this path to where you store the .csv file. you can provide an absolute path like this. Check the path syntax for Windows as it might be different.
masterdata <- read.csv("./data/masterdataProject1C_May24.csv")
## Change date to date format in R
masterdata <- masterdata %>%
mutate(Date = lubridate::dmy(as.character(Date)) )
# Make AD, Treatment and SludgeType = factors
masterdata$AD <- factor(masterdata$AD, levels = c("AD1", "AD2","AD3", "AD4","AD5", "AD6", "Full-scale", "PSTWAS"))
masterdata$SludgeType <- factor(masterdata$SludgeType, levels = c("Control", "Treatment", "Foam", "Full-scale", "PSTWAS"))
masterdata$Treatment <- factor(masterdata$Treatment, levels = c("Control", "Treatment", "Full-scale", "PSTWAS"))
masterdata$Period <- factor(masterdata$Period, levels = c("Converging", "SteadyState", "Glycerol", "Inhibition", "Recovery/Feedingpause", "Recovery/Foaming", "Recovery/Postfoam", "SteadyState/Postfoam"))
# LOAD REFLECTANCE DATA
ATR <- read.csv("./data/ADVATR.csv")[-1,]
ATR <- ATR %>% rownames_to_column("ID")%>% column_to_rownames("X") %>% dplyr::select(-ID) %>%
mutate(across(S537_AD1:S1370_AD6, as.numeric))
# OPTION 1: raw
ATR <- as.matrix(ATR)
head(ATR) %>% datatable(caption = "ATR raw")
# OPTION 2: clr transformed
ATRclr <- compositions::clr(ATR)
head(ATRclr) %>% datatable(caption = "ATR clr")
# CREATE METADATA FOR 24 SELECTED SAMPLES AND MATCH SAMPLE NAMES WITH SPECTRA DF
filtervec <- c("2023-05-01", "2023-05-15", "2023-06-07","2023-06-12") # 12thJuneDNA = 13th June EPS
filtervec2 <- c("AD1", "AD2", "AD3", "AD4", "AD5","AD6") # check
metadata <- masterdata %>%
dplyr::filter(Date %in% filtervec &
AD %in% filtervec2)
metadata$SampleID.EPS <- colnames(ATR)
metadata <- metadata %>% column_to_rownames("SampleID.EPS")
head(metadata ) %>% datatable(caption = "metadata for 24 selected samples")
# CREATE A PHYLOSEQ OBJECTS (combined metadata and spectra)
psATR <-phyloseq(
otu_table(ATR, taxa_are_rows = T),
sample_data(metadata)
)
psATRclr <-phyloseq(
otu_table(ATRclr, taxa_are_rows = T),
sample_data(metadata)
)
# ABSORBANCE DATA
AVE <- read.csv("./data/AVE.csv")[-1,]
AVE <- AVE %>% rownames_to_column("ID")%>% column_to_rownames("X") %>% dplyr::select(-ID) %>%
mutate(across(S537_AD1:S1370_AD6, as.numeric))
# OPTION 1: raw
AVE <- as.matrix(AVE)
# OPTION 2: clr transformed
AVEclr <- compositions::clr(AVE)
# head(AVEclr)
# CREATE A PHYLOSEQ OBJECTS (combined metadata and spectra)
psAVE <-phyloseq(
otu_table(AVE, taxa_are_rows = T),
sample_data(metadata)
)
psAVEclr <-phyloseq(
otu_table(AVEclr, taxa_are_rows = T),
sample_data(metadata)
)
This is just for you to see variability of performance of individual ADs.
# Daily from masterfile
nrow(masterdata %>%
dplyr::filter(Gasflow_mL > 20000)) # outlier
## [1] 2
firstdate <- "2023-03-01"
lastdate <- "2023-08-15"
masterdata %>%
dplyr::filter(Gasflow_mL >= 0 &
Gasflow_mL < 20000) %>%
dplyr::filter(Date != c("2023-06-08")) %>% # outlier as the heaters turned off that day
# dplyr::filter(AD != "AD4") %>% #AD4 always behaved differently due to the lack of pre-mixing
mutate(Gasflow_L = Gasflow_mL / 1000) %>%
ggline(x = "Date",
y = "Gasflow_L",
color = "AD",
legend = "right",
add = "mean_se",
add.params = list(width = 0.75)) +
#scale_color_manual("Sludge Type", values = cols) +
theme_light() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1),
legend.position = c(0.90, 0.82)
#legend.background=element_rect(fill = alpha("white", 0.5))
) +
scale_x_date(date_breaks = "1 week", date_labels = "%b %d",
date_minor_breaks = "1 day",
limits = as.Date(c(firstdate, lastdate)) ) +
annotate("text",x = as.Date("2023-04-21"), y = -1, label = "W08 ", size = 2.5) + #Week 08
annotate("text",x = as.Date("2023-04-28"), y = -1, label = "W09 ", size = 2.5) + #Week 09
annotate("text",x = as.Date("2023-05-05"), y = -1, label = "W10 ", size = 2.5) + #Week 10
annotate("text",x = as.Date("2023-05-12"), y = -1, label = "W11 ", size = 2.5) + #Week 11
annotate("text",x = as.Date("2023-05-19"), y = -1, label = "W12 ", size = 2.5) + #Week 12
annotate("text",x = as.Date("2023-05-26"), y = -1, label = "W13 ", size = 2.5) + #Week 13
annotate("text",x = as.Date("2023-06-02"), y = -1, label = "W14 ", size = 2.5) + #Week 14
annotate("text",x = as.Date("2023-06-09"), y = -1, label = "W15 ", size = 2.5) + #Week 15
annotate("text",x = as.Date("2023-06-16"), y = -1, label = "W16 ", size = 2.5) + #Week 16
annotate("text",x = as.Date("2023-06-23"), y = -1, label = "W17 ", size = 2.5) + #Week 17
annotate("text",x = as.Date("2023-06-30"), y = -1, label = "W18 ", size = 2.5) + #Week 18
annotate("text",x = as.Date("2023-04-24"), y = -.4, label = "Steady state", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-04-17"), xend = as.Date("2023-05-02"),
arrow = arrow(ends = "last", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-05-05"), y = -.4, label = "Glycerol ", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-05-02"), xend = as.Date("2023-05-07"),
arrow = arrow(ends = "both", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-05-14"), y = -0.4, label = " Inhibition", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-05-07"), xend = as.Date("2023-05-22"),
arrow = arrow(ends = "both", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-05-26"), y = -0.4, label = "Recovery", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-05-22"), xend = as.Date("2023-05-30"),
arrow = arrow(ends = "both", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-06-03"), y = -0.4, label = "Foaming ", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-05-30"), xend = as.Date("2023-06-07"),
arrow = arrow(ends = "both", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-06-10"), y = -0.4, label = " Recovery", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-06-07"), xend = as.Date("2023-06-15"),
arrow = arrow(ends = "both", angle = 90, length = unit(.1,"cm"))) +
annotate("text",x = as.Date("2023-05-25"), y = 4, label = "Paused feeding", size = 2.5, angle = 90, alpha = 0.6) +
annotate("rect", ymin = 0, ymax = 6, xmin = as.Date("2023-05-22"), xmax = as.Date("2023-05-29"),
fill = "blue", alpha = .1) +
annotate("text",x = as.Date("2023-06-03"), y = 7, label = "Foam", size = 2.5, angle = 90, alpha = 0.6) +
annotate("rect", ymin = 4.5, ymax = 8, xmin = as.Date("2023-06-01"), xmax = as.Date("2023-06-06"),
fill = "blue", alpha = .1) +
annotate("text",x = as.Date("2023-06-24"), y = -.4, label = "Steady state post foam", size = 2.5) +
annotate("segment", y = 0, yend = 0, x = as.Date("2023-06-07"), xend = as.Date("2023-07-03"),
arrow = arrow(ends = "first", angle = 90, length = unit(.1,"cm"))) +
ylab(expression(paste("Gasflow (L day"^-1*")")))
#ggsave("./Figures/Sup_lineplot_gasdaily_indADs.png", height=15, width=25, units='cm', dpi=300)
Just to see how microbial 16S abundances vary among the selected
samples. We expect treated/inhibited samples (15th May) to be different.
library(factoextra)
library(compositions)
my_comparisons <- list(c(1, 2))
symbolsize <- 3
ps.temp <- psATR
pca <- prcomp(otu_table(ps.temp), scale = FALSE, center = TRUE)
# summary(otu_table(ps.temp))
# head(otu_table(ps.temp))
data <- get_pca(pca)
cord <- data$coord # extract sample coordinates of PC
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib # contributions of variables
# Combine
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") )
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.315
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.908
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.0075 **
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.205
Assessing if there were significant differences in the variation of
Principal Components (PCs) between groups (control and treatment)
library(factoextra)
library(compositions)
ps.temp <- psAVE
# summary(otu_table(ps.temp))
pca <- prcomp(data.frame(otu_table(ps.temp)), scale = FALSE, center = TRUE)
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>%
left_join(sample_data(ps.temp) %>% rownames_to_column("ID") )
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.133
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.397
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.026 *
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.502
Assessing if there were significant differences in the variation of Principal Components (PCs) between groups (control and treatment)
library(factoextra)
library(compositions)
ps.temp <- psATRclr
pca <- prcomp(otu_table(ps.temp), scale = FALSE, center = FALSE)
# summary(otu_table(ps.temp))
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") )
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.281
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.627
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.00117 **
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) # 0.518
Assessing if there were significant differences in the variation of Principal Components (PCs) between groups (control and treatment)
library(factoextra)
library(compositions)
my_comparisons <- list(c(1, 2))
ps.temp <- psAVEclr
# summary(otu_table(ps.temp))
pca <- prcomp(data.frame(otu_table(ps.temp)), scale = FALSE, center = FALSE) # FALSE FOR BOTH - CLR data should already be centred and scaled.
# otu_table(ps.temp)
data <- get_pca(pca)
cord <- data$coord
cor <- data$cor
cos2 <- data$cos2
contrib <- data$contrib
df.tmp <- (data.frame(cord) %>% rownames_to_column("ID")) %>% left_join(sample_data(ps.temp) %>% rownames_to_column("ID") )
# compare_means(Dim.1 ~ Treatment, method = "t.test", df.tmp) #0.172
# compare_means(Dim.2 ~ Treatment, method = "t.test", df.tmp) #0.708
# compare_means(Dim.3 ~ Treatment, method = "t.test", df.tmp) #0.0228 *
# compare_means(Dim.4 ~ Treatment, method = "t.test", df.tmp) #0.613
Assessing if there were significant differences in the variation of Principal Components (PCs) between groups (control and treatment)
Unsure why the clr-transformed ATR spectra resulted in such different PC3 contributions compared to clr-transformed AVE spectra.
This needs further assessments/regression etc as it was done without any consideration to data distribution / validity.
library(ggcorrplot)
# Compute a correlation matrix
df.cor <- df.tmp %>% filter(ID != "S963_AD6") %>% # filter this sample as it has NAs
dplyr::select(Dim.1, Dim.2, Dim.3, Dim.4, Dim.5, Gasflow_mL, C, H_pct, N_pct, CN, HC, Ethanol:Hexanoic.acid, ngmL_qb, DNA_mg_gCOD)
corr <- round(cor(df.cor, method = "spearman"), 1)
p <- ggcorrplot(corr)
p + ggtitle("AVE clr correlations to variables")
#ggsave(plot = p, "./Figures/PC3correlations_clr.png", height=18, width=18, units='cm', dpi=300)